set more 1

/*
File:	cepr_basic_topcode_lognormal.do, CEPR ORG Version 0.9
Date:	Nov 21, 2006
	Feb 15, 2008
	Feb 10, 2009
	Jan 4, 2010
	Feb 22, 2011 CEPR ORG Version 1.6
	Jan 4, 2012 CEPR ORG Version 1.7
	Dec 20, 2013
	Mar 12, 2015, CEPR ORG Version 2.0
	Apr  1, 2015, CEPR ORG Version 2.0.1
	March 1, 2016, CEPR ORG Version 2.1
	Oct 12, 2016, CEPR ORG Version 2.1.1
	Feb 9, 2017, CEPR ORG Version 2.2
	Apr 21, 2017, CEPR ORG Version 2.2.1
	Apr 10, 2018, CEPR ORG Version 2.3
	Mar 22, 2019, CEPR ORG Version 2.4
	Jul 24, 2019, CEPR ORG Version 2.4.1
	Feb 05, 2020, CEPR ORG Version 2.5
	
Desc:	Estimates mean above the top-code for weekly earnings
        using the lognormal distribution
Note:	See copyright notice at end of program.
*/

/* Determine data year */
local year=year in 1

/* top-code by year */

if 1979<=`year' & `year'<=1988 {
global topcode=999 /* top-code is $999 per week */
}
if 1989<=`year' & `year'<=1997 {
global topcode=1923 /* top-code is $1,923 per week */
}
if 1998<=`year' & `year'<=2020 {
global topcode=2884 /* top-code is $2,884 per week */
}
global T=ln($topcode) /* to simplify calculation below */

/* mark observations with top-coded weekly earnings */

if 1994<=`year' & `year'<=2020 {
tempvar earnwke
gen `earnwke'=prernwa/100
gen byte tc=0 if empl==1 & selfemp==0 & prernwa~=. & peernhry==2
replace tc=1 if empl==1 & selfemp==0 & peernhry==2 & /*
*/ $topcode<=`earnwke' & `earnwke'~=. 
/* weekly earnings recode; two implied decimal places */ 
}
lab var tc "Weekly earnings topcoded"
notes tc: Weekly earnings topcodes are: 1979-88: $999; 1989-97: $1923; /*
*/ 1998- $2884

/* Description of procedure for estimating mean above top-code 

   Step 1: estimate mean, s.d. of uncensored distribution 
 
If
1. y*~N[mu,sigma^2] (uncensored distribution)
2. y=a if a<=y* 
3. y=y* if y*<a
then 
1. E[y]=(1-PHI)*a + PHI*(mu + sigma*lamda)
2. Var[y]=sigma^2 * (1-PHI) * [(1-delta)+(alpha-lamda)^2*PHI]
where
1. PHI (uppercase) is standard normal cumulative density function
2. PHI=PHI[(a-mu)/sigma]=PHI(alpha)=Prob(y*>a)
3. lamda= -phi(alpha)/PHI(alpha)
4. phi (lowercase) is standard normal probability density function
5. delta=lamda^2 - lamda*alpha
therefore

See William H. Greene, Econometric Analysis, 5th Edition, pp. 763-64.

   Step 2: use mean, s.d. of uncensored distribution to estimate mean
   of the uncensored distribution above the top-code
 
E[y*|y>a]=mu + [(sigma*phi(alpha))/(1 - PHI(alpha)]
where
alpha=(a - mu)/sigma

See Greene, pp. 759-60.
*/

capture program drop tcln
program define tcln
version 9.0
while "`1'"~="" {
*
* syntax: tcln input1 input2
* where input1 is if statement for all, male, or female
* 	input2 is suffix for all, male, female
*
* Step 1
*
* a. convert earnings per week to dollars (from cents)
*
capture drop `earnwke'
tempvar earnwke
gen `earnwke'=prernwa/100
*
*
* b. calculate share of weekly earnings at or above the top-code (PHI)
* universe is all those not paid by hour and reporting weekly earnings
* not paid by hour is paidhre==2 in NBER data, ==0 in modified data
*
sum tc [aw=orgwgt] if `1'
local PHI=1-r(mean)
*
* c. calculate other needed values
* top-coding implies right-censoring
*
* take natural log of weekly earnings since procedure
* assumes weekly earnings are log-normally distributed
*
tempvar lnwke
gen `lnwke'=ln(`earnwke') if empl==1 & selfemp==0 & `1'
sum `lnwke' [aw=orgwgt] if tc~=. & `1' /* only reporting weekly earnings */
drop `earnwke'
local X=r(mean) /* mean, calculated using top-code (E[y] above) */
local SD=r(sd) /* standard deviation calculated using top-code */
local alpha=invnorm(`PHI')
local lamda=-normden(`alpha')/norm(`alpha')
*
* d. calculate estimates of true mean and standard deviation
local lsigma=($T-`X')/(`PHI'*(`alpha'-`lamda'))
local lmu   =$T - `alpha'*`lsigma'
*
* e. convert from natural logs back to dollars per week
local mX=exp(`X')
local mu=exp(`lmu')
local mT=exp($T)
local sigma=exp(`lsigma')
*
* Step 2
*
* a. calculate mean above top-code
* calculating mean above top-code implies left-truncation
*
local halpha=($T-`lmu')/`lsigma'
local hlamda=normden(`halpha')/(1 - norm(`halpha'))
local mtc=`lmu' + `lsigma'*`hlamda'
*
* b. convert from natural logs back to dollars per week
global matcln`2' : display %6.0f exp(`mtc')
*
mac shift 2
}
end

tcln "female~=." a "female==0" m "female==1" f

/* 	
Copyright 2020 CEPR and John Schmitt

This file is part of the cepr_org_master.do program. This file and all
programs referenced in it are free software. You can redistribute the
program or modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
USA.
*/
